I. Motivating question

在自己設定不同難度的資料中,我想透過機器學習來嘗試找出最適配的模型

II. Focus problem

  • 資料降維後的視覺化

  • 不同 machine 的表現

III. Data and EDA

資料的來源為我的 shiny apps

以下是我有使用的一些 packages

library(APLM)
library(magrittr)
library(purrr)
library(ggplot2)
library(gridExtra)
library(mvtnorm)
library(MASS)
library(umap)
library(Rtsne)
library(nnet)
library(DT)
library(corrplot)
library(RColorBrewer)
library(rpart)
library(rpart.plot)
library(GGally)
library(randomForest)
library(xgboost)
library(knitr)

資料生成時有設定種子碼以固定結果

set.seed(611011106)

Level 1 data

Data.Frame


Pairs Plot


Dimension Reduction



Level 2 data

Data.Frame


Pairs Plot


Dimension Reduction


Level 3 data

Data.Frame


Pairs Plot


Dimension Reduction


Level 4 data

Data.Frame


Pairs Plot


Dimension Reduction


IV. Models

在開始進行模型預測之前,我將資料進行 K-fold cross vaildation (K=5) 的處理

easytag <- kfcv(data=easy_data, seed=611011106, k=5)

midmtag <- kfcv(data=midm_data, seed=611011106, k=5)

midptag <- kfcv(data=midp_data, seed=611011106, k=5)

hardtag <- kfcv(data=hard_data, seed=611011106, k=5)

Level 1 data

LDA

success <- 0
lda_tb <- list()

for(i in 1:5){
  train <- geasy_data[-easytag[[i]],]
  test <- geasy_data[easytag[[i]],]
  
  plda <- lda(group~., train)
  
  p <- predict(plda, test[,-1])$class
  r <- group[easytag[[i]]]
  
  lda_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 11  0
##   3  0  0 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 20  0
##   3  0  0 20
## 
## [[3]]
##    r
## p    1  2  3
##   1 18  0  0
##   2  1 24  0
##   3  0  0 17
## 
## [[4]]
##    r
## p    1  2  3
##   1 23  0  0
##   2  0 24  0
##   3  0  0 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 17  1  0
##   2  1 20  0
##   3  0  0 21
## 99 %

Logistic Regression

success <- 0
lg_tb <- list()

for(i in 1:5){
  train <- geasy_data[-easytag[[i]],]
  test <- geasy_data[easytag[[i]],]
  
  lg <- multinom(group~., train)
  
  p<- predict(lg, test[,-1])
  r <- group[easytag[[i]]]
  
  lg_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## Call:
## multinom(formula = group ~ ., data = train)
## 
## Coefficients:
##   (Intercept)         X1        X2       X3       X4
## 2   -76.06613 -0.9971579 15.190423 1.797816 2.108939
## 3   -77.17384  8.0086608 -1.785702 5.205221 5.976934
## 
## Std. Errors:
##   (Intercept)        X1        X2        X3        X4
## 2    159.9742  16.28883  49.90668  44.65194  16.27796
## 3   2405.1956 139.43649 377.00929 296.00723 447.05320
## 
## Residual Deviance: 0.01058902 
## AIC: 20.01059
## [[1]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 11  1
##   3  0  0 28
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 20  0
##   3  0  0 20
## 
## [[3]]
##    r
## p    1  2  3
##   1 18  1  0
##   2  1 23  0
##   3  0  0 17
## 
## [[4]]
##    r
## p    1  2  3
##   1 22  0  0
##   2  0 24  0
##   3  1  0 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 17  1  0
##   2  1 20  0
##   3  0  0 21
## 98 %

Logistic Regression (PCA)


下面是累積到各個 PC 可以解釋的變異比例

如果只有 PC1 就只能解釋 58.28% 的變異

加上 PC2 後則可以解釋 80.39% 的變異


success <- 0
lgpc_tb <- list()
for(i in 1:5){
  train <- geasy_data[-easytag[[i]], ]
  test  <- geasy_data[easytag[[i]], ]
  
  Pca <- prcomp(train[,-1], center=TRUE, scale.=TRUE)
  
  ptrain <- Pca[["x"]]
  ptrain <- data.frame(geasy_data$group[-easytag[[i]]], ptrain)
  colnames(ptrain)[1] <- "group"

  lg <- multinom(group~PC1+PC2, data = ptrain)
  
  ptest <- predict(Pca, test[,-1])
  p <- predict(lg, ptest)
  r <- geasy_data$group[easytag[[i]]]
  
  lgpc_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## Call:
## multinom(formula = group ~ PC1 + PC2, data = ptrain)
## 
## Coefficients:
##   (Intercept)       PC1        PC2
## 2   -20.97123 -10.78459 63.6664890
## 3   -13.86168  35.26689  0.6224278
## 
## Std. Errors:
##   (Intercept)       PC1       PC2
## 2    235.7793  160.3935  459.7122
## 3    693.9337 1497.9336 3055.8864
## 
## Residual Deviance: 0.0004086614 
## AIC: 12.00041
## [[1]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 11  0
##   3  0  0 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 20  0
##   3  0  0 20
## 
## [[3]]
##    r
## p    1  2  3
##   1 19  0  0
##   2  0 24  0
##   3  0  0 17
## 
## [[4]]
##    r
## p    1  2  3
##   1 23  1  0
##   2  0 23  0
##   3  0  0 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 17  0  0
##   2  1 21  0
##   3  0  0 21
## 99.33 %

CART

success <- 0
rt_tb <- list()

for(i in 1:5){
  train <- geasy_data[-easytag[[i]],]
  test <- geasy_data[easytag[[i]],]
  
  rt <- rpart(group ~ ., data = train)

  p <- predict(rt, test) %>% round()
  pp <- c()
  for(j in 1:dim(p)[1]){
    pp[j] <- which(p[j,]==1)
  }
  p <- pp
  r <- group[easytag[[i]]]
  
  rt_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}

## [[1]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 11  0
##   3  0  0 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 19  0  0
##   2  0 20  0
##   3  1  0 20
## 
## [[3]]
##    r
## p    1  2  3
##   1 18  0  1
##   2  1 24  0
##   3  0  0 16
## 
## [[4]]
##    r
## p    1  2  3
##   1 23  0  3
##   2  0 24  0
##   3  0  0 10
## 
## [[5]]
##    r
## p    1  2  3
##   1 17  1  0
##   2  1 20  0
##   3  0  0 21
## 97.33 %

Random Forest

success <- 0
rf_tb <- list()

for(i in 1:5){
  train <- geasy_data[-easytag[[i]],]
  test <- geasy_data[easytag[[i]],]
  train$group <- train$group %>% as.factor()
  
  rf <- randomForest(group~., train)
  
  p<- predict(plda, test[,-1])$class
  r <- group[easytag[[i]]]
  
  rf_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 11  0
##   3  0  0 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 20  0
##   3  0  0 20
## 
## [[3]]
##    r
## p    1  2  3
##   1 19  0  0
##   2  0 24  0
##   3  0  0 17
## 
## [[4]]
##    r
## p    1  2  3
##   1 23  0  0
##   2  0 24  0
##   3  0  0 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 17  1  0
##   2  1 20  0
##   3  0  0 21
## 99.33 %

success <- 0
rf_tb <- list()

for(i in 1:5){
  train <- geasy_data[-easytag[[i]],]
  test <- geasy_data[easytag[[i]],]
  train$group <- train$group %>% as.factor()
  
  rf <- randomForest(group~., train, ntree=50, mtry=1)
  
  p<- predict(plda, test[,-1])$class
  r <- group[easytag[[i]]]
  
  rf_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 11  0
##   3  0  0 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 20  0
##   3  0  0 20
## 
## [[3]]
##    r
## p    1  2  3
##   1 19  0  0
##   2  0 24  0
##   3  0  0 17
## 
## [[4]]
##    r
## p    1  2  3
##   1 23  0  0
##   2  0 24  0
##   3  0  0 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 17  1  0
##   2  1 20  0
##   3  0  0 21
## 99.33 %
##    MeanDecreaseGini
## X1         47.54109
## X2         65.85372
## X3         20.41197
## X4         25.47672

XGBoost

geasy_data$group <- as.integer(geasy_data$group)-1
success <- 0
xgb_tb <- list()
for(i in 1:5){
  train <- geasy_data[-easytag[[i]],]
  test <- geasy_data[easytag[[i]],]
  
  dtrain <- xgb.DMatrix(data = as.matrix(train[,-1]),
                       label = train$group)
  dtest <- xgb.DMatrix(data = as.matrix(test[,-1]),
                       label = test$group)
  
  xgb.model <- xgboost(data = dtrain, 
                       objective='multi:softmax',
                       num_class=3,
                       print_every_n = 100,
                       max_depth = 1,   
                       eta = 0.3,    
                       gamma = 0,    
                       subsample = 1,   
                       colsample_bytree = 1,  
                       min_child_weight = 12,
                       nround = 30)

  # 如果要畫出 xgb 內的所有決策樹,可以用以下函式(但因為會很多,這裡就不畫了)
  # xgb.plot.tree(model = xgb.model) 

  # 預測
  p <- predict(xgb.model, dtest)+1
  r <- geasy_data$group[easytag[[i]]]+1
  xgb_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 11  0
##   3  0  0 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  0  0
##   2  0 20  0
##   3  0  0 20
## 
## [[3]]
##    r
## p    1  2  3
##   1 19  0  0
##   2  0 24  0
##   3  0  0 17
## 
## [[4]]
##    r
## p    1  2  3
##   1 23  0  1
##   2  0 24  0
##   3  0  0 12
## 
## [[5]]
##    r
## p    1  2  3
##   1 17  1  0
##   2  1 20  0
##   3  0  0 21
## 99 %

Level 2 data

LDA

success <- 0
lda_tb <- list()

for(i in 1:5){
  train <- gmidm_data[-midmtag[[i]],]
  test <- gmidm_data[midmtag[[i]],]
  
  plda <- lda(group~., train)
  
  p<- predict(plda, test[,-1])$class
  r <- group[midmtag[[i]]]
  
  lda_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3
##   1 16  2  0
##   2  3  8  0
##   3  1  1 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  1  0
##   2  0 18  1
##   3  0  1 19
## 
## [[3]]
##    r
## p    1  2  3
##   1 16  0  1
##   2  2 22  1
##   3  1  2 15
## 
## [[4]]
##    r
## p    1  2  3
##   1 17  0  0
##   2  6 22  0
##   3  0  2 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 18  3  0
##   2  0 17  1
##   3  0  1 20
## 90 %

Logistic Regression

success <- 0
lg_tb <- list()

for(i in 1:5){
  train <- gmidm_data[-midmtag[[i]],]
  test <- gmidm_data[midmtag[[i]],]
  
  lg <- multinom(group~., train)
  
  p<- predict(lg, test[,-1])
  r <- group[midmtag[[i]]]
  
  lg_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## Call:
## multinom(formula = group ~ ., data = train)
## 
## Coefficients:
##   (Intercept)        X1       X2        X3        X4
## 2   -7.761984 -1.553083 1.767898  0.256090 0.2545674
## 3  -16.005019  1.597211 2.682378 -1.255145 1.3873693
## 
## Std. Errors:
##   (Intercept)        X1        X2        X3        X4
## 2    1.847236 0.3093902 0.3173634 0.2786558 0.2539864
## 3    3.350834 0.5032890 0.4730883 0.4704229 0.3918130
## 
## Residual Deviance: 134.3401 
## AIC: 154.3401
## [[1]]
##    r
## p    1  2  3
##   1 16  2  0
##   2  3  8  1
##   3  1  1 28
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  1  1
##   2  0 18  1
##   3  0  1 18
## 
## [[3]]
##    r
## p    1  2  3
##   1 15  1  1
##   2  3 22  1
##   3  1  1 15
## 
## [[4]]
##    r
## p    1  2  3
##   1 17  1  0
##   2  6 22  0
##   3  0  1 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 18  3  0
##   2  0 17  1
##   3  0  1 20
## 89 %

Logistic Regression (PCA)


下面是累積到各個 PC 可以解釋的變異比例

如果只有 PC1 就只能解釋 39.75% 的變異

加上 PC2 後則可以解釋 66.96% 的變異

再加上 PC3 後則可以解釋 86.31% 的變異


success <- 0
lgpc_tb <- list()
for(i in 1:5){
  train <- gmidm_data[-midmtag[[i]], ]
  test  <- gmidm_data[midmtag[[i]], ]
  
  Pca <- prcomp(train[,-1], center=TRUE, scale.=TRUE)
  
  ptrain <- Pca[["x"]]
  ptrain <- data.frame(gmidm_data$group[-midmtag[[i]]], ptrain)
  colnames(ptrain)[1] <- "group"

  lg <- multinom(group~PC1+PC2+PC3, data = ptrain)
  
  ptest <- predict(Pca, test[-1])
  p <- predict(lg, ptest)
  r <- gmidm_data$group[midmtag[[i]]]
  
  lgpc_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## Call:
## multinom(formula = group ~ PC1 + PC2 + PC3, data = ptrain)
## 
## Coefficients:
##   (Intercept)        PC1        PC2        PC3
## 2  -0.2916505 -0.3613469 -3.3651139  1.1716770
## 3  -0.9192512 -3.8961902 -0.6990297 -0.2482348
## 
## Std. Errors:
##   (Intercept)       PC1       PC2       PC3
## 2   0.3916937 0.3347252 0.5172701 0.3449087
## 3   0.4983293 0.6204812 0.4658587 0.4378014
## 
## Residual Deviance: 153.1981 
## AIC: 169.1981
## [[1]]
##    r
## p    1  2  3
##   1 16  3  1
##   2  3  7  0
##   3  1  1 28
## 
## [[2]]
##    r
## p    1  2  3
##   1 19  1  1
##   2  0 19  1
##   3  1  0 18
## 
## [[3]]
##    r
## p    1  2  3
##   1 14  1  1
##   2  3 22  1
##   3  2  1 15
## 
## [[4]]
##    r
## p    1  2  3
##   1 17  1  1
##   2  6 22  0
##   3  0  1 12
## 
## [[5]]
##    r
## p    1  2  3
##   1 18  3  0
##   2  0 17  2
##   3  0  1 19
## 87.67 %

加上 PC3 的結果比沒加時預測準確度高出 1%


CART

success <- 0
rt_tb <- list()

for(i in 1:5){
  train <- gmidm_data[-midmtag[[i]],]
  test <- gmidm_data[midmtag[[i]],]
  
  rt <- rpart(group ~ ., data = train)

  p <- predict(rt, test) %>% round()
  pp <- c()
  for(j in 1:dim(p)[1]){
    pp[j] <- which(p[j,]==max(p[j,]))
  }
  p <- pp
  r <- group[midmtag[[i]]]
  
  rt_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}

## [[1]]
##    r
## p    1  2  3
##   1 13  1  4
##   2  4  9  2
##   3  3  1 23
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  0  2
##   2  0 19  3
##   3  0  1 15
## 
## [[3]]
##    r
## p    1  2  3
##   1  9  0  0
##   2  4 20  1
##   3  6  4 16
## 
## [[4]]
##    r
## p    1  2  3
##   1 17  5  0
##   2  4 15  0
##   3  2  4 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 18  7  1
##   2  0 14  1
##   3  0  0 19
## 80 %

Random Forest

success <- 0
rf_tb <- list()

for(i in 1:5){
  train <- gmidm_data[-midmtag[[i]],]
  test <- gmidm_data[midmtag[[i]],]
  train$group <- train$group %>% as.factor()
  
  rf <- randomForest(group~., train)
  
  p<- predict(plda, test[,-1])$class
  r <- group[midmtag[[i]]]
  
  rf_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3
##   1 16  2  0
##   2  3  8  0
##   3  1  1 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  1  1
##   2  0 18  1
##   3  0  1 18
## 
## [[3]]
##    r
## p    1  2  3
##   1 16  1  1
##   2  2 20  1
##   3  1  3 15
## 
## [[4]]
##    r
## p    1  2  3
##   1 17  2  0
##   2  6 20  0
##   3  0  2 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 18  3  0
##   2  0 17  1
##   3  0  1 20
## 88.33 %

## mtry = 2  OOB error = 14.17% 
## Searching left ...
## mtry = 1     OOB error = 15% 
## -0.05882353 0.05 
## Searching right ...
## mtry = 4     OOB error = 16.25% 
## -0.1470588 0.05

##       mtry  OOBError
## 1.OOB    1 0.1500000
## 2.OOB    2 0.1416667
## 4.OOB    4 0.1625000
success <- 0
rf_tb <- list()

for(i in 1:5){
  train <- gmidm_data[-midmtag[[i]],]
  test <- gmidm_data[midmtag[[i]],]
  train$group <- train$group %>% as.factor()
  
  rf <- randomForest(group~., train, ntree=100, mtry=2)
  
  p<- predict(plda, test[,-1])$class
  r <- group[midmtag[[i]]]
  
  rf_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3
##   1 16  2  0
##   2  3  8  0
##   3  1  1 29
## 
## [[2]]
##    r
## p    1  2  3
##   1 20  1  1
##   2  0 18  1
##   3  0  1 18
## 
## [[3]]
##    r
## p    1  2  3
##   1 16  1  1
##   2  2 20  1
##   3  1  3 15
## 
## [[4]]
##    r
## p    1  2  3
##   1 17  2  0
##   2  6 20  0
##   3  0  2 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 18  3  0
##   2  0 17  1
##   3  0  1 20
## 88.33 %
##    MeanDecreaseGini
## X1         60.33646
## X2         58.45880
## X3         19.16287
## X4         21.39738

XGBoost

gmidm_data$group <- as.integer(gmidm_data$group)-1
success <- 0
xgb_tb <- list()
for(i in 1:5){
  train <- gmidm_data[-midmtag[[i]],]
  test <- gmidm_data[midmtag[[i]],]
  
  dtrain <- xgb.DMatrix(data = as.matrix(train[,-1]),
                       label = train$group)
  dtest <- xgb.DMatrix(data = as.matrix(test[,-1]),
                       label = test$group)
  
  xgb.model <- xgboost(data = dtrain, 
                       objective='multi:softmax',
                       num_class=3,
                       print_every_n = 100,
                       max_depth = 3,   
                       eta = 0.3,    
                       gamma = 0,    
                       subsample = 1,   
                       colsample_bytree = 1,  
                       min_child_weight = 12,
                       nround = 150)

  # 如果要畫出 xgb 內的所有決策樹,可以用以下函式(但因為會很多,這裡就不畫了)
  # xgb.plot.tree(model = xgb.model) 

  # 預測
  p <- predict(xgb.model, dtest)+1
  r <- gmidm_data$group[midmtag[[i]]]+1
  xgb_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3
##   1 16  2  1
##   2  3  8  0
##   3  1  1 28
## 
## [[2]]
##    r
## p    1  2  3
##   1 19  0  1
##   2  1 17  2
##   3  0  3 17
## 
## [[3]]
##    r
## p    1  2  3
##   1 16  1  1
##   2  2 20  1
##   3  1  3 15
## 
## [[4]]
##    r
## p    1  2  3
##   1 17  0  0
##   2  6 21  0
##   3  0  3 13
## 
## [[5]]
##    r
## p    1  2  3
##   1 18  5  0
##   2  0 15  1
##   3  0  1 20
## 86.67 %

Level 3 data

LDA

success <- 0
lda_tb <- list()

for(i in 1:5){
  train <- gmidp_data[-midptag[[i]],]
  test <- gmidp_data[midptag[[i]],]
  
  plda <- lda(group~., train)
  
  p<- predict(plda, test[,-1])$class
  r <- group[midptag[[i]]]
  
  lda_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3  4
##   1 10  3  3  0
##   2  2  9  1  5
##   3  2  0 20  0
##   4  2  5  2 16
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 15  6  1  1
##   2  2 16  0  1
##   3  1  1 11  0
##   4  2  3  1 19
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 16  0  1  0
##   2  3 13  3  4
##   3  4  0 15  0
##   4  1  1  1 18
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 13  1  0  0
##   2  3 14  2  3
##   3  4  0 17  0
##   4  0  7  0 16
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 10  0  1  0
##   2  2 21  0  4
##   3  4  0 21  0
##   4  4  0  0 13
## 75.75 %

Logistic Regression

success <- 0
lg_tb <- list()

for(i in 1:5){
  train <- gmidp_data[-midptag[[i]],]
  test <- gmidp_data[midptag[[i]],]
  
  lg <- multinom(group~., train)
  
  p<- predict(lg, test[,-1])
  r <- group[midptag[[i]]]
  
  lg_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## Call:
## multinom(formula = group ~ ., data = train)
## 
## Coefficients:
##   (Intercept)         X1         X2         X3         X4       X5
## 2   -1.152588 -1.0643308 -1.0409963  1.7034721 -1.7457894 0.484605
## 3    7.218486 -0.4645813 -0.5225255 -0.9869686 -0.1287672 1.272399
## 4   -6.828936  0.1667986 -1.8088136  2.5009878 -3.6524196 1.212267
## 
## Std. Errors:
##   (Intercept)        X1        X2        X3        X4        X5
## 2    1.934027 0.2434073 0.2357610 0.3729507 0.2972693 0.2162955
## 3    1.862796 0.2246910 0.2266733 0.2535173 0.2255872 0.2375589
## 4    2.468516 0.2902900 0.3186631 0.4728831 0.4449898 0.2865967
## 
## Residual Deviance: 394.2037 
## AIC: 430.2037
## [[1]]
##    r
## p    1  2  3  4
##   1 10  2  3  1
##   2  2 11  0  6
##   3  2  0 21  0
##   4  2  4  2 14
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 17  7  1  3
##   2  2 15  0  1
##   3  1  1 11  1
##   4  0  3  1 16
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 15  0  1  0
##   2  4 13  2  3
##   3  4  0 15  0
##   4  1  1  2 19
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 14  1  3  1
##   2  2 16  1  3
##   3  4  0 15  0
##   4  0  5  0 15
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 10  2  1  1
##   2  3 19  0  3
##   3  4  0 21  0
##   4  3  0  0 13
## 75 %

Logistic Regression (PCA)


下面是累積到各個 PC 可以解釋的變異比例

如果只有 PC1 就只能解釋 35.29% 的變異

加上 PC2 後可以解釋 61.51% 的變異

再加上 PC3 後可以解釋 78.53% 的變異


success <- 0
lgpc_tb <- list()
for(i in 1:5){
  train <- gmidp_data[-midptag[[i]], ]
  test  <- gmidp_data[midptag[[i]], ]
  
  Pca <- prcomp(train[,-1], center=TRUE, scale.=TRUE)
  
  ptrain <- Pca[["x"]]
  ptrain <- data.frame(gmidp_data$group[-midptag[[i]]], ptrain)
  colnames(ptrain)[1] <- "group"

  lg <- multinom(group~PC1+PC2+PC3, data = ptrain)
  
  ptest <- predict(Pca, test[-1])
  p <- predict(lg, ptest)
  r <- gmidp_data$group[midptag[[i]]]
  
  lgpc_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## Call:
## multinom(formula = group ~ PC1 + PC2 + PC3, data = ptrain)
## 
## Coefficients:
##   (Intercept)        PC1       PC2        PC3
## 2  -0.1090491  0.4887995 1.2758641 -0.1905764
## 3  -1.1887818 -2.0702048 0.3645244  0.8129034
## 4  -0.9699945  1.3851039 1.6232281  1.7938276
## 
## Std. Errors:
##   (Intercept)       PC1      PC2       PC3
## 2   0.2331447 0.2279549 0.201134 0.2236235
## 3   0.3503022 0.2981167 0.229690 0.2854826
## 4   0.3226705 0.2947342 0.241147 0.3178975
## 
## Residual Deviance: 488.4273 
## AIC: 512.4273
## [[1]]
##    r
## p    1  2  3  4
##   1 10  5  3  1
##   2  3  3  2  6
##   3  2  0 21  0
##   4  1  9  0 14
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 12  8  1  3
##   2  4 13  0  3
##   3  1  1 12  1
##   4  3  4  0 14
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 12  3  1  2
##   2  5  7  1  5
##   3  4  1 17  0
##   4  3  3  1 15
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 10  2  2  3
##   2  3 15  2  4
##   3  6  0 15  0
##   4  1  5  0 12
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 10  3  0  1
##   2  2 17  0  2
##   3  1  0 22  0
##   4  7  1  0 14
## 66.25 %

CART

success <- 0
rt_tb <- list()

for(i in 1:5){
  train <- gmidp_data[-midptag[[i]],]
  test <- gmidp_data[midptag[[i]],]
  
  rt <- rpart(group ~ ., data = train)

  p <- predict(rt, test) %>% round()
  pp <- c()
  for(j in 1:dim(p)[1]){
    pp[j] <- which(p[j,]==max(p[j,]))
  }
  p <- pp
  r <- group[midptag[[i]]]
  
  rt_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}

## [[1]]
##    r
## p    1  2  3  4
##   1 10  3  5  6
##   2  4  7  2  6
##   3  1  0 19  0
##   4  1  7  0  9
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 14  5  3  4
##   2  5 14  0  1
##   3  0  1  9  1
##   4  1  6  1 15
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 11  1  1  4
##   2  5 10  3  1
##   3  4  0 14  2
##   4  4  3  2 15
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 11  0  3  4
##   2  2 15  1  1
##   3  6  0 15  0
##   4  1  7  0 14
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 13  4  2  0
##   2  1 17  0  5
##   3  0  0 20  0
##   4  6  0  0 12
## 66 %

Random Forest

success <- 0
rf_tb <- list()

for(i in 1:5){
  train <- gmidp_data[-midptag[[i]],]
  test <- gmidp_data[midptag[[i]],]
  train$group <- train$group %>% as.factor()
  
  rf <- randomForest(group~., train)
  
  p<- predict(plda, test[,-1])$class
  r <- group[midptag[[i]]]
  
  rf_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3  4
##   1  9  3  3  0
##   2  2  9  0  4
##   3  3  0 22  0
##   4  2  5  1 17
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 15  3  1  1
##   2  2 19  0  1
##   3  1  1 11  0
##   4  2  3  1 19
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 15  0  1  0
##   2  3 13  1  4
##   3  4  0 16  0
##   4  2  1  2 18
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 15  0  1  0
##   2  2 18  1  3
##   3  3  0 17  0
##   4  0  4  0 16
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 10  0  1  0
##   2  2 21  0  4
##   3  4  0 21  0
##   4  4  0  0 13
## 78.5 %
layout(matrix(c(1,2),nrow=1), width=c(4,1))
par(mar=c(5,4,4,0)) #No margin on the right side
plot(rf)
par(mar=c(5,0,4,2)) #No margin on the left side
plot(c(0,1),type="n", axes=F, xlab="", ylab="")
legend("top", colnames(rf$err.rate),col=1:4,cex=0.8,fill=1:4)

tuneRF(train[,-1], train[,1])
## mtry = 2  OOB error = 27.19% 
## Searching left ...
## mtry = 1     OOB error = 29.69% 
## -0.09195402 0.05 
## Searching right ...
## mtry = 4     OOB error = 28.44% 
## -0.04597701 0.05

##       mtry OOBError
## 1.OOB    1 0.296875
## 2.OOB    2 0.271875
## 4.OOB    4 0.284375
success <- 0
rf_tb <- list()

for(i in 1:5){
  train <- gmidp_data[-midptag[[i]],]
  test <- gmidp_data[midptag[[i]],]
  train$group <- train$group %>% as.factor()
  
  rf <- randomForest(group~., train, ntree=100, mtry=2)
  
  p<- predict(plda, test[,-1])$class
  r <- group[midptag[[i]]]
  
  rf_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3  4
##   1  9  3  3  0
##   2  2  9  0  4
##   3  3  0 22  0
##   4  2  5  1 17
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 15  3  1  1
##   2  2 19  0  1
##   3  1  1 11  0
##   4  2  3  1 19
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 15  0  1  0
##   2  3 13  1  4
##   3  4  0 16  0
##   4  2  1  2 18
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 15  0  1  0
##   2  2 18  1  3
##   3  3  0 17  0
##   4  0  4  0 16
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 10  0  1  0
##   2  2 21  0  4
##   3  4  0 21  0
##   4  4  0  0 13
## 78.5 %
rf$importance
##    MeanDecreaseGini
## X1         40.36386
## X2         32.13683
## X3         47.27555
## X4         63.31641
## X5         56.05916

XGBoost

gmidp_data$group <- as.integer(gmidp_data$group)-1
success <- 0
xgb_tb <- list()
for(i in 1:5){
  train <- gmidp_data[-midptag[[i]],]
  test <- gmidp_data[midptag[[i]],]
  
  dtrain <- xgb.DMatrix(data = as.matrix(train[,-1]),
                       label = train$group)
  dtest <- xgb.DMatrix(data = as.matrix(test[,-1]),
                       label = test$group)
  
  xgb.model <- xgboost(data = dtrain, 
                       objective='multi:softmax',
                       num_class=4,
                       print_every_n = 100,
                       max_depth = 1,   
                       eta = 0.3,    
                       gamma = 0,    
                       subsample = 1,   
                       colsample_bytree = 1,  
                       min_child_weight = 12,
                       nround = 150)

  # 如果要畫出 xgb 內的所有決策樹,可以用以下函式(但因為會很多,這裡就不畫了)
  # xgb.plot.tree(model = xgb.model) 

  # 預測
  p <- predict(xgb.model, dtest)+1
  r <- gmidp_data$group[midptag[[i]]]+1
  xgb_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3  4
##   1 10  3  4  1
##   2  2  9  0  9
##   3  3  0 21  0
##   4  1  5  1 11
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 16  7  5  1
##   2  2 15  1  1
##   3  1  1  7  1
##   4  1  3  0 18
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 14  0  2  1
##   2  6 12  2  0
##   3  3  0 15  0
##   4  1  2  1 21
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 11  1  2  1
##   2  1 17  2  2
##   3  7  1 15  1
##   4  1  3  0 15
## 
## [[5]]
##    r
## p    1  2  3  4
##   1  9  0  1  1
##   2  2 21  0  1
##   3  5  0 21  1
##   4  4  0  0 14
## 73 %

Level 4 data

LDA

success <- 0
lda_tb <- list()

for(i in 1:5){
  train <- ghard_data[-hardtag[[i]],]
  test <- ghard_data[hardtag[[i]],]
  
  plda <- lda(group~., train)
  
  p<- predict(plda, test[,-1])$class
  r <- group[hardtag[[i]]]
  
  lda_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3  4
##   1  9  1  1  0
##   2  6 13  0  4
##   3  1  2 25  0
##   4  0  1  0 17
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 13 10  1  0
##   2  3 10  0  4
##   3  2  0 12  0
##   4  2  6  0 17
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 13  2  0  0
##   2  4 11  0  3
##   3  4  0 20  0
##   4  3  1  0 19
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 11  4  4  0
##   2  6 11  0  3
##   3  2  0 15  0
##   4  1  7  0 16
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 11  4  4  0
##   2  5  8  0  0
##   3  2  1 18  0
##   4  2  8  0 17
## 71.5 %

Logistic Regression

success <- 0
lg_tb <- list()

for(i in 1:5){
  train <- ghard_data[-hardtag[[i]],]
  test <- ghard_data[hardtag[[i]],]
  
  lg <- multinom(group~., train)
  
  p<- predict(lg, test[,-1])
  r <- group[hardtag[[i]]]
  
  lg_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## Call:
## multinom(formula = group ~ ., data = train)
## 
## Coefficients:
##   (Intercept)         X1        X2        X3         X4        X5
## 2   -6.732855  0.5353301 -1.851090  1.704793 -0.3371828 0.3845232
## 3   15.979502 -0.2248725  2.324965 -4.658457  1.3145202 0.8509714
## 4  -24.419680  2.4504441 -5.384352  5.057537 -2.8939583 2.0507335
## 
## Std. Errors:
##   (Intercept)        X1        X2        X3        X4        X5
## 2    1.643493 0.2069970 0.3141770 0.3322866 0.2141052 0.2389116
## 3    3.258931 0.3565227 0.7011019 0.9105876 0.4043250 0.3930075
## 4    3.742443 0.3864056 0.7210244 0.7145816 0.4869484 0.4364483
## 
## Residual Deviance: 365.196 
## AIC: 401.196
## [[1]]
##    r
## p    1  2  3  4
##   1  9  2  1  0
##   2  6 12  0  4
##   3  1  2 25  0
##   4  0  1  0 17
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 13 11  1  0
##   2  4 10  0  4
##   3  2  0 12  0
##   4  1  5  0 17
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 13  1  0  0
##   2  4 11  0  3
##   3  4  0 20  0
##   4  3  2  0 19
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 14  6  4  0
##   2  5  9  0  4
##   3  1  0 15  0
##   4  0  7  0 15
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 11  6  4  0
##   2  6  9  0  1
##   3  2  0 18  0
##   4  1  6  0 16
## 71.25 %

Logistic Regression (PCA)


下面是累積到各個 PC 可以解釋的變異比例

如果只有 PC1 就只能解釋 36.20% 的變異

加上 PC2 後可以解釋 64.17% 的變異

再加上 PC3 後可以解釋 82.13% 的變異


success <- 0
lgpc_tb <- list()
for(i in 1:5){
  train <- ghard_data[-hardtag[[i]], ]
  test  <- ghard_data[hardtag[[i]], ]
  
  Pca <- prcomp(train[,-1], center=TRUE, scale.=TRUE)
  
  ptrain <- Pca[["x"]]
  ptrain <- data.frame(ghard_data$group[-hardtag[[i]]], ptrain)
  colnames(ptrain)[1] <- "group"

  lg <- multinom(group~PC1+PC2+PC3, data = ptrain)
  
  ptest <- predict(Pca, test[-1])
  p <- predict(lg, ptest)
  r <- ghard_data$group[midptag[[i]]]
  
  lgpc_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## Call:
## multinom(formula = group ~ PC1 + PC2 + PC3, data = ptrain)
## 
## Coefficients:
##   (Intercept)        PC1         PC2         PC3
## 2   0.0213224 -0.1896045 -0.04104361 -0.06122511
## 3  -1.0211892 -1.7969429 -0.69353177  1.92821923
## 4  -0.0963046 -0.0516834  0.64206426  1.01283193
## 
## Std. Errors:
##   (Intercept)       PC1       PC2       PC3
## 2   0.1898186 0.1449818 0.1473594 0.1832821
## 3   0.2953453 0.2555225 0.2257196 0.3140569
## 4   0.2024693 0.1530437 0.1720265 0.2156909
## 
## Residual Deviance: 647.8498 
## AIC: 671.8498
## [[1]]
##    r
## p    1  2  3  4
##   1  6  8  0  3
##   2  5  3  2  2
##   3  4  4 22  5
##   4  1  2  2 11
## 
## [[2]]
##    r
## p    1  2  3  4
##   1  7 13  0  3
##   2  1  5  2  1
##   3  2  3 11  4
##   4 10  5  0 13
## 
## [[3]]
##    r
## p    1  2  3  4
##   1  6  3  0  1
##   2 12  8  1  4
##   3  3  2 15  2
##   4  3  1  4 15
## 
## [[4]]
##    r
## p    1  2  3  4
##   1  9  7  3  4
##   2  4  5  1  2
##   3  0  4 13  0
##   4  7  6  2 13
## 
## [[5]]
##    r
## p    1  2  3  4
##   1  5  7  0  2
##   2 11  7  4  1
##   3  1  2 15  2
##   4  3  5  3 12
## 50.25 %

CART

success <- 0
rt_tb <- list()

for(i in 1:5){
  train <- ghard_data[-hardtag[[i]],]
  test <- ghard_data[hardtag[[i]],]
  
  rt <- rpart(group ~ ., data = train)

  p <- predict(rt, test) %>% round()
  pp <- c()
  for(j in 1:dim(p)[1]){
    pp[j] <- which(p[j,]==max(p[j,]))
  }
  p <- pp
  r <- group[hardtag[[i]]]
  
  rt_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}

## [[1]]
##    r
## p    1  2  3  4
##   1 11  8  2  6
##   2  3  6  1  4
##   3  2  1 23  1
##   4  0  2  0 10
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 13 10  2  6
##   2  1  8  0  4
##   3  2  5 11  1
##   4  4  3  0 10
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 10  4  1  1
##   2  8  8  1 10
##   3  4  2 18  0
##   4  2  0  0 11
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 11  8  2  5
##   2  4  7  1  3
##   3  2  1 16  0
##   4  3  6  0 11
## 
## [[5]]
##    r
## p    1  2  3  4
##   1  9  3  2  3
##   2  7  4  2  1
##   3  1  1 15  1
##   4  3 13  3 12
## 56 %

Random Forest

success <- 0
rf_tb <- list()

for(i in 1:5){
  train <- ghard_data[-hardtag[[i]],]
  test <- ghard_data[hardtag[[i]],]
  train$group <- train$group %>% as.factor()
  
  rf <- randomForest(group~., train)
  
  p<- predict(plda, test[,-1])$class
  r <- group[hardtag[[i]]]
  
  rf_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3  4
##   1 10  4  1  0
##   2  5 12  0  3
##   3  1  0 25  0
##   4  0  1  0 18
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 13  6  1  0
##   2  4 14  0  3
##   3  1  0 12  0
##   4  2  6  0 18
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 13  2  0  0
##   2  4 10  0  1
##   3  4  0 20  0
##   4  3  2  0 21
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 12  4  4  0
##   2  6 11  0  2
##   3  1  0 15  0
##   4  1  7  0 17
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 11  4  4  0
##   2  5  8  0  0
##   3  2  1 18  0
##   4  2  8  0 17
## 73.75 %
layout(matrix(c(1,2),nrow=1), width=c(4,1))
par(mar=c(5,4,4,0)) #No margin on the right side
plot(rf)
par(mar=c(5,0,4,2)) #No margin on the left side
plot(c(0,1),type="n", axes=F, xlab="", ylab="")
legend("top", colnames(rf$err.rate),col=1:4,cex=0.8,fill=1:4)

tuneRF(train[,-1], train[,1])
## mtry = 2  OOB error = 33.44% 
## Searching left ...
## mtry = 1     OOB error = 38.44% 
## -0.1495327 0.05 
## Searching right ...
## mtry = 4     OOB error = 37.5% 
## -0.1214953 0.05

##       mtry OOBError
## 1.OOB    1 0.384375
## 2.OOB    2 0.334375
## 4.OOB    4 0.375000
success <- 0
rf_tb <- list()

for(i in 1:5){
  train <- ghard_data[-hardtag[[i]],]
  test <- ghard_data[hardtag[[i]],]
  train$group <- train$group %>% as.factor()
  
  rf <- randomForest(group~., train, ntree=150, mtry=2)
  
  p<- predict(plda, test[,-1])$class
  r <- group[hardtag[[i]]]
  
  rf_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3  4
##   1 10  4  1  0
##   2  5 12  0  3
##   3  1  0 25  0
##   4  0  1  0 18
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 13  6  1  0
##   2  4 14  0  3
##   3  1  0 12  0
##   4  2  6  0 18
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 13  2  0  0
##   2  4 10  0  1
##   3  4  0 20  0
##   4  3  2  0 21
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 12  4  4  0
##   2  6 11  0  2
##   3  1  0 15  0
##   4  1  7  0 17
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 11  4  4  0
##   2  5  8  0  0
##   3  2  1 18  0
##   4  2  8  0 17
## 73.75 %
rf$importance
##    MeanDecreaseGini
## X1         39.32964
## X2         45.70967
## X3         76.47191
## X4         41.88933
## X5         35.88388

XGBoost

ghard_data$group <- as.integer(ghard_data$group)-1
success <- 0
xgb_tb <- list()
for(i in 1:5){
  train <- ghard_data[-hardtag[[i]],]
  test <- ghard_data[hardtag[[i]],]
  
  dtrain <- xgb.DMatrix(data = as.matrix(train[,-1]),
                       label = train$group)
  dtest <- xgb.DMatrix(data = as.matrix(test[,-1]),
                       label = test$group)
  
  xgb.model <- xgboost(data = dtrain, 
                       objective='multi:softmax',
                       num_class=4,
                       print_every_n = 100,
                       max_depth = 2,   
                       eta = 0.3,    
                       gamma = 0,    
                       subsample = 1,   
                       colsample_bytree = 1,  
                       min_child_weight = 12,
                       nround = 150)

  # 如果要畫出 xgb 內的所有決策樹,可以用以下函式(但因為會很多,這裡就不畫了)
  # xgb.plot.tree(model = xgb.model) 

  # 預測
  p <- predict(xgb.model, dtest)+1
  r <- ghard_data$group[hardtag[[i]]]+1
  xgb_tb[[i]] <- table(p, r)
  success <- success + sum(p==r)
}
## [[1]]
##    r
## p    1  2  3  4
##   1 13  2  3  0
##   2  0 12  0  5
##   3  3  1 22  1
##   4  0  2  1 15
## 
## [[2]]
##    r
## p    1  2  3  4
##   1 12 11  0  2
##   2  1 10  0  7
##   3  2  2 12  0
##   4  5  3  1 12
## 
## [[3]]
##    r
## p    1  2  3  4
##   1 14  2  1  1
##   2  3  7  0  3
##   3  3  1 19  0
##   4  4  4  0 18
## 
## [[4]]
##    r
## p    1  2  3  4
##   1 13  5  2  2
##   2  5 14  1  3
##   3  1  0 16  0
##   4  1  3  0 14
## 
## [[5]]
##    r
## p    1  2  3  4
##   1 12  7  6  3
##   2  4  7  2  2
##   3  1  1 14  0
##   4  3  6  0 12
## 67 %

V. Conclusion

下表是不同難度資料的預測準確度,從表中看出,資料如預期的預測成功率越來越低

不過在 Multivariate normal Data 中,可以看出相對簡單的模型表現也不輸複雜的模型

LDA 和 Logistic Regression 的表現一直都還不錯

而 Random Forest 除了難度2的資料以外都是表現最好的

最後是 CART 的表現是最差的,單一樹難以區分後面較為複雜的資料

Data predict accuracy
LDA LR LR(PCA) CART RF XGB
level 1 99 % 98 % 99.33 % 97.33 % 99.33 % 99 %
level 2 90 % 89 % 87.67 % 80 % 88.33 % 86.67 %
level 3 75.75 % 75 % 66.25 % 66 % 78.5 % 73 %
level 4 71.5 % 71.25 % 50.25 % 56 % 73.75 % 67 %